# -----------------------------------------------------------------------------
# 【基础配置】
# -----------------------------------------------------------------------------

# 机器人的名字
BOT_NAME = "crawler_bot"

# 告诉Scrapy去哪里寻找爬虫(Spiders)
SPIDER_MODULES = ["app.spider"]
NEWSPIDER_MODULE = "app.spider"

# 设置一个基础的User-Agent，避免被一些网站秒拒
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"

# 遵守robots.txt协议
ROBOTSTXT_OBEY = True

# -----------------------------------------------------------------------------
# 【数据库配置】
# -----------------------------------------------------------------------------
DB_SETTINGS = {
    'host': 'localhost',  # 数据库主机地址
    'user': 'arch',  # 您的数据库用户名
    'password': 'root',  # 您的数据库密码
    'database': 'gaokao',  # 您的数据库名
    'port': 3306,  # 端口号，默认为3306
}

# -----------------------------------------------------------------------------
# 【pipeline配置】
# -----------------------------------------------------------------------------
# 数字越小，优先级越高。我们希望先验证，再入库。
ITEM_PIPELINES = {
    "app.pipeline.validation_pipeline.ValidationPipeline": 300,
    "app.pipeline.database_pipeline.DatabasePipeline": 400,
}

# -----------------------------------------------------------------------------
# 模拟浏览器登录的请求头
# -----------------------------------------------------------------------------

# Scrapy发出的所有请求，都会默认带上这些请求头
DEFAULT_REQUEST_HEADERS = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Cookie': '19B3E7E5ACDE2DC7CC7E2B035204F7EA',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
}
